/*******************************************************************************
File    : NSS_70_Merging.do
Project : Bank Expansion and Moneylender Interest Rates - RDD Evidence from India
Purpose : Clean NSS 70 district names and fuzzy-match to RBI 2005 MOF dataset
Author  : Kannan Narayanaswamy
Updated : 09 Feb 2026
*******************************************************************************/

clear all
set more off

*------------------------------------------------------------------------------
* 1. Load NSS 70 mapping file and map 2013 districts to 2001 parent districts
*------------------------------------------------------------------------------

use "Data\NSS_70_District_Codes.dta", clear

* Convert district codes to numeric for matching
destring NSS_70_Codes, replace

gen Parent_District = District

* ---- Andaman & Nicobar ----
replace Parent_District = "Andamans"                    if District == "North & middle Andaman"

* ---- Arunachal Pradesh ----
replace Parent_District = "Lohit"                       if District == "Anjaw"
replace Parent_District = "Lower Subansiri"             if District == "Kurungkumey"
replace Parent_District = "Dibang Valley"               if District == "Lower Dibang Valley"

* ---- Assam ----
replace Parent_District = "Kamrup"                      if District == "Guwahati"
replace Parent_District = "Darrang"                     if District == "Udalguri"

* ---- Bihar ----
replace Parent_District = "Jehanabad"                   if District == "Arwal"
replace Parent_District = "Champaran"                   if District == "Purba Champaran"
replace Parent_District = "Champaran"                   if District == "Paschim Champaran"

* ---- Chhattisgarh ----
replace Parent_District = "Bastar"                      if District == "Narayanpur"
replace Parent_District = "Dantewada"                   if District == "Bijapur_22"

* ---- Gujarat ----
replace Parent_District = "Surat"                       if District == "Tapi"

* ---- Haryana ----
replace Parent_District = "Faridabad"                   if District == "Palwal"
replace Parent_District = "Gurgaon"                     if District == "Mewat"

* ---- Jammu & Kashmir (2001 boundaries) ----
replace Parent_District = "Doda"                        if District == "Ramban"
replace Parent_District = "Anantnag"                    if District == "Kulgam"
replace Parent_District = "Udhampur"                    if District == "Reasi"
replace Parent_District = "Baramulla"                   if District == "Bandipora"
replace Parent_District = "Srinagar"                    if District == "Ganderbal"
replace Parent_District = "Anantnag"                    if District == "Shopian"
replace Parent_District = "Kathua"                      if District == "Samba"
replace Parent_District = "Doda"                        if District == "Kishtwar"

* ---- Jharkhand ----
replace Parent_District = "Palamu"                      if District == "Latehar"
replace Parent_District = "Dumka"                       if District == "Jamtara"
replace Parent_District = "Paschimi Singhbhum"          if District == "Seraikela-kharsawan"
replace Parent_District = "Hazaribagh"                  if District == "Ramgarh"
replace Parent_District = "Ranchi"                      if District == "Khunti"
replace Parent_District = "Dumka"                       if District == "Simdega"
replace Parent_District = "Paschimi Singhbhum"          if District == "Pashchimi Singhbhum"

* ---- Karnataka ----
replace Parent_District = "Kolar"                       if District == "Chikkaballapura"
replace Parent_District = "Gulbarga"                    if District == "Yadgir"
replace Parent_District = "Bangalore Rural"             if District == "Ramanagar"

* ---- Madhya Pradesh ----
replace Parent_District = "Shahdol"                     if District == "Anuppur"
replace Parent_District = "Guna"                        if District == "Ashoknagar"
replace Parent_District = "East Nimar"                  if District == "Burhanpur"
replace Parent_District = "Jhabua"                      if District == "Alirajpur"
replace Parent_District = "Shahdol"                     if District == "Singrauli"

* ---- Nagaland ----
replace Parent_District = "Tuensang"                    if inlist(District,"Kiphire","Longleng")
replace Parent_District = "Kohima"                      if District == "Peren"

* ---- Punjab ----
replace Parent_District = "Amritsar"                    if District == "Tarn Taran"
replace Parent_District = "Sangrur"                     if District == "Barnala"

* ---- Tamil Nadu ----
replace Parent_District = "Dharmapuri"                  if District == "Krishnagiri"
replace Parent_District = "Perambalur"                  if District == "Ariyalur *"
replace Parent_District = "Perambalur"                  if District == "Perambalur *"

* ---- Uttar Pradesh ----
replace Parent_District = "Etah"                        if District == "Kashiramnagar"

* ---- West Bengal ----
replace Parent_District = "Medinpur_old"                if District == "Pashim Midnapur"
replace Parent_District = "Medinpur_old"                if District == "Purba Midnapur"


drop District
rename Parent_District District

*------------------------------------------------------------------------------
* 2. Fix known district name mismatches between NSS and Census/RBI datasets
*------------------------------------------------------------------------------

replace District = "West Nimar"          if District == "Khargoan (W. Nimar)"
replace District = "East Nimar"          if District == "Khandwa (E. Nimar)"
replace District = "Haora"               if District == "Howrah"
replace District = "Dohad"               if District == "Dahod"
replace District = "Kaimur"              if District == "Bhabua kaimur"
replace District = "Purba Champaran"     if District == "Champaran(E)"
replace District = "Paschim Champaran"   if District == "Champaran(W)"
replace District = "Purba Singhbhum"     if District == "Singhbhum (E)"
replace District = "Paschim Singhbhum"   if District == "Singhbhum (W)"
replace District = "Dharmapuri"		     if District == "Dharmapur"
replace District = "Baramulla"			 if District == "Baramula"
replace District = "Boudh"				 if District == "Baudh *"
replace District = "Keonjhar"			 if District == "Kendujhar"


* Delhi correction (RBI data reports at Delhi level)
replace District = "N.C.T Delhi"         if State_UT == "Delhi *"

* Save cleaned NSS mapping
tempfile nss_70_map
save `nss_70_map'

*------------------------------------------------------------------------------
* 3. Fuzzy match to RBI–Census (2005) dataset
*------------------------------------------------------------------------------

reclink State_UT District ///
    using "Data\MOF_Data\RBI_2005_Q1\MOF_Census_2005_Q1.dta", ///
    idmaster(NSS_70_Codes) idusing(ID_RBI) ///
    gen(match_score)
	

* One wrong match that I manually corrected
replace UDistrict = "" in 520
replace match_score = . in 520
replace ID_RBI = . in 520
replace sbi_2005 = . in 520
replace onb_2005 = . in 520
replace fb_2005 = . in 520
replace rrb_2005 = . in 520
replace scb_2005 = . in 520
replace all_2005 = . in 520
replace pop_total = . in 520
replace Underbanked = . in 520
replace ratio_banks = . in 520
replace B_Ratio_Dist_Ex = . in 520
replace above_thresh = . in 520
replace _merge = 1 in 520
	
gen str4 NSS_Code = string(NSS_70_Codes, "%04.0f")
drop NSS_70_Codes UState_UT UDistrict MOF_Kim match_score
rename NSS_Code NSS_70_Codes
	
	
*------------------------------------------------------------------------------
* 4. Check - Are all the 583 districts mapped?
*------------------------------------------------------------------------------

preserve

count

duplicates drop ID_RBI, force

drop _merge

merge 1:1 ID_RBI using "Data\MOF_Data\RBI_2005_Q1\MOF_Census_2005_Q1.dta"

tab District if _merge !=3

restore


count
* There are 583 districts. Therefore, all districts are mapped.
* Chirag, Tiruppur, Baksa, Pratapgarh and Mohali are the 5 districts in 2013 that have multiple parents. So, we are the observations related to them it from the analysis.

drop if _merge != 3
drop _merge

rename Underbanked treatment

save "Data\MOF_Census_NSS_70", replace